
#####
# Replication code for "Age-Group Identity and Political Participation"
# Sam Trachtman, Sarah Anzia, and Charlotte Hill 
# Research & Politics
#####

rm(list=ls())
setwd("C:/Users/sam.trachtman/Dropbox/age_climate_shared/replication") #adjust.. 

#libraries 
library(rio)
library(ggplot2)
library(psych)
library(gridExtra)
library(stargazer)
library(dplyr)
library(estimatr)
library(nFactors)
library(survey)
library(tidyverse)

#load data
df <- import("df.csv")

###################################
#Dimension reduce to generate key age-id measures 
###################################

samp = subset(df, select = c("common","insult", "interest", "praise", "connect","id"))
samp = na.omit(samp)

#number of factors
corr = cor(samp[,1:ncol(samp)-1])
ev <- eigen(cor(corr))
nScree(x= corr)

#factor analysis
factors <- fa(r = corr, nfactors = 1, scores = "regression")
factors

#inspecting results
factors$values
factors$communality
factors$loadings #Table A1 
samp$score = factor.scores(samp[,1:ncol(samp)-1], factors)$scores

#generating age-id using simple summated ratings
samp$age.summated = with(samp,
                         (common + insult + interest + praise + connect)/5)

#scaling
samp$age.score <- (samp$score - min(samp$score))/(max(samp$score)-min(samp$score))
samp$age.score.summated <- (samp$age.summated - min(samp$age.summated))/(max(samp$age.summated)-min(samp$age.summated))

#merging
samp <- subset(samp, select = c("id","age.score", "age.score.summated"))
df <- left_join(df, samp, by = c("id"))

###################################
#Dimension reduce to generate strength of PID measures 
###################################

samp = subset(df, select = c("common.pty","insult.pty", "interest.pty", "praise.pty", "connect.pty","id"))
samp = na.omit(samp)

#factor analysis
corr = cor(samp[,1:ncol(samp)-1])
factors <- fa(r = corr, nfactors = 1, scores = "regression")

#inspecting
factors$values
factors$communality
factors$loadings
samp$score = factor.scores(samp[,1:ncol(samp)-1], factors)$scores

#summated ratings 
samp$pty.summated = with(samp,
                         (common.pty + insult.pty + interest.pty + praise.pty + connect.pty)/5)

#scaling
samp$pty.score <- (samp$score - min(samp$score))/(max(samp$score)-min(samp$score))
samp$pty.score.summated <- (samp$pty.summated - min(samp$pty.summated))/(max(samp$pty.summated)-min(samp$pty.summated))

#merging
samp <- subset(samp, select = c("id","pty.score", "pty.score.summated"))
df <- left_join(df, samp, by = c("id"))

#counter
df$counter = 1


###################################
#Age-group identity descriptives
###################################

#fig1-- age id factor analysis
fig1 <- ggplot(data = df, aes(x=age, y= age.score))+
  geom_smooth(method="loess")+
  coord_cartesian(ylim=c(.5,.7))+
  theme_bw()+
  theme(text = element_text(size=14))+
  labs(x = "Age", y = "Age-group identity (0-1 scale)")
fig1
ggsave("figures/fig1.png", fig1, width=6, height=4)

#fig2-- age id and pid summated ratings
fig2 <- ggplot(data = subset(df, pid7!=4), aes(x=age))+
  geom_smooth(aes(y=age.score.summated), method="loess", col = "blue", lty=1)+
  geom_smooth(aes(y=pty.score.summated), method="loess", col = "dark grey", lty=2)+
  annotate("text", x=57, y=.65, label= "Partisan identity") +
  annotate("text", x=73, y=.55, label= "Age identity") +
  coord_cartesian(ylim=c(.5,.8))+
  theme_bw()+
  theme(text = element_text(size=14))+
  labs(x = "Age", y = "Summated ratings scale (0-1)")
fig2
ggsave("figures/fig2.png", fig2, width=6, height=4)


#fig A3-- linked fate continuous descriptive
figA3 <- ggplot(data = df, aes(x=age, y= link_str2))+
  geom_smooth(method="loess")+
  theme_bw()+
  coord_cartesian(ylim=c(1,2))+
  labs(x = "Age", y = "Age-based linked fate (0-3 scale)")+
  theme(text = element_text(size=14))
figA3
ggsave("figures/figA3.png", figA3, width=6, height=4)


###########################################
#Fig A1-- age identity plots by 4 bandwidths using svy weights
###########################################
design <- svydesign(id            = ~id,
                    weights = ~weight,
                    nest    = TRUE,
                    data    = df)
design.18.39 <- svydesign(id            = ~id,
                          weights = ~weight,
                          nest    = TRUE,
                          data    = subset(df, interval2=="18-39"))
design.40.54 <- svydesign(id            = ~id,
                          weights = ~weight,
                          nest    = TRUE,
                          data    = subset(df, interval2=="40-54"))
design.55.73 <- svydesign(id            = ~id,
                          weights = ~weight,
                          nest    = TRUE,
                          data    = subset(df, interval2=="55-73"))
design.74.plus <- svydesign(id            = ~id,
                            weights = ~weight,
                            nest    = TRUE,
                            data    = subset(df, interval2=="over_74"))

svy.all <- svymean(~age.score, design, na.rm = TRUE)
svy.18.39 <- svymean(~age.score, design.18.39,  na.rm = TRUE)
svy.40.54 <- svymean(~age.score, design.40.54,  na.rm = TRUE)
svy.55.73 <- svymean(~age.score, design.55.73,  na.rm = TRUE)
svy.over.74 <- svymean(~age.score, design.74.plus,  na.rm = TRUE)

mean <- c(svy.18.39[1], svy.40.54[1], svy.55.73[1], svy.over.74[1])
se <- c(.0067, .0123, .0102, .0104)
interval <- c("18-39","40-54","55-73","Over 73")
plotdata <- data.frame(mean, se, interval)

figA1 <- ggplot(data=plotdata,aes(x=interval))+
  geom_point(aes(y=mean), size=3)+
  geom_linerange(aes(ymin=mean-1.96*se, ymax=mean+1.96*se), linetype = 1, size = 1)+
  theme_bw()+
  labs(x = "", y = "Mean age-based identity (0-1)")+
  theme(axis.text.x = element_text(size  = 12, angle = 45, hjust = 1, vjust = 1))+
  scale_color_manual(values=c('black','grey'))+
  theme(text = element_text(size=14))+
  coord_cartesian(ylim=c(.5,.8))
figA1
ggsave("figures/figA1.png", figA1, width=6, height=4)


#####################################
#Age-group identity and political participation analysis 
#######################################

#mean center and normalize
df$age.01 <- (df$age - min(df$age))/(max(df$age)-min(df$age)) 
df$age.mc <- df$age.01 - mean(df$age.01)

df$pid7.01 <- (df$pid7 - min(df$pid7))/(max(df$pid7)-min(df$pid7))
df$pid7.mc <- df$pid7.01 - mean(df$pid7.01)

df$age.score.mc <- df$age.score - mean(df$age.score,na.rm=T)
df$pty.score.mc <- df$pty.score - mean(df$pty.score,na.rm=T)
df$link_str2.mc <- df$link_str2 - mean(df$link_str2,na.rm=T)

#interaction terms
df$interact <- df$age.score.mc*df$age.mc
df$interact_pty <- df$pty.score.mc*df$pid7.mc
df$interactlf <- df$link_str2*df$age.mc

#Table 1 regressions 
#age identity and voting
idreg1 <- lm(vote2018 ~ age.score.mc, data = df)
idreg2 <- lm(vote2018 ~ age.score.mc + age.mc + interact, data = df)
idreg3 <- lm(vote2018 ~ age.score.mc + age.mc + interact + pid7 + white + black + male + bach_degree + church + unemp, data = df)

#age identity and climate action
idrega1 <- lm(climate_action ~ age.score.mc, data = df)
idrega2 <- lm(climate_action ~ age.score.mc + age.mc + interact, data = df)
idrega3 <- lm(climate_action ~ age.score.mc + age.mc + interact + pid7 + white + black + male + bach_degree + church + unemp, data = df)

#constructing Table 1
cov.lab = c("Age identity","Age","Age identity * Age", "PID", "White", "Black", "Male", "Bachelors degree", "Church-going", "Out of work")
stargazer(idreg1, idreg2, idreg3, idrega1, idrega2, idrega3,
          se = starprep(idreg1, idreg2, idreg3, idrega1, idrega2, idrega3),
          no.space = T, title = "",
          digits = 2,
          covariate.labels=cov.lab,
          #order = c(1,2,8,3,4,5,6,7),
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          keep.stat=c("n","rsq"),
          label = "",
          omit = c("time","Constant")
)

#table A5 regressions 
#linked fate and voting 
lfreg1 <- lm(vote2018 ~ link_str2.mc, data = df)
lfreg2 <- lm(vote2018 ~ link_str2.mc + age.mc + interactlf, data = df)
lfreg3 <- lm(vote2018 ~ link_str2.mc + age.mc + interactlf + pid7 + white + black + male + bach_degree + church + unemp, data = df)

#linked fate and climate action
lfrega1 <- lm(climate_action ~ link_str2.mc, data = df)
lfrega2 <- lm(climate_action ~ link_str2.mc + age.mc + interactlf, data = df)
lfrega3 <- lm(climate_action ~ link_str2.mc + age.mc + interactlf + pid7 + white + black + male + bach_degree + church + unemp, data = df)

#constructing Table A5
stargazer(lfreg1, lfreg2, lfreg3, lfrega1, lfrega2, lfrega3,
          se = starprep(lfreg1, lfreg2, lfreg3, lfrega1, lfrega2, lfrega3),
          no.space = T, title = "",
          digits = 2,
          covariate.labels=cov.lab,
          #order = c(1,2,8,3,4,5,6,7),
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          keep.stat=c("n","rsq"),
          label = "",
          omit = c("time","Constant")
)

####################################################
# Age ID and participation by 10 year age interval 
#################################################

#construct fig3, regressions by 10 year intervals with covariate adjustment 
#left side of fig3
vote.out <- df %>% 
  filter(interval != "" & interval != "90-99") %>%
  nest(-interval) %>% #key step here is basically "nesting" in these groups
  mutate(fit = map(data, ~ lm(vote2018 ~ age.score + pid7 + ideo52 + white + black + male + bach_degree + church + unemp, data=.)), #then, within groups, run regressions
         results   = map(fit, tidy)) %>% #glancing at the fit 
  unnest(results) %>% #then unnest
  filter(term=="age.score") %>%
  arrange(., interval) %>%
  mutate(lower.conf = estimate-1.96*std.error,
         upper.conf = estimate+1.96*std.error)
vote.out$x = c(1:7)
interval <- c("20-29", "30-39","40-49","50-59","60-69","70-79","80-89")

coef_vote_by_age_covs <- ggplot(data=vote.out,aes(x=x))+
  geom_point(aes(y=estimate), size=3) +
  geom_linerange(aes(ymin=lower.conf, ymax=upper.conf), size=1)+
  theme_bw()+
  geom_hline(yintercept = 0, linetype = "dashed")+
  scale_color_manual(values=c('blue','grey','red'))+
  labs(x = "", y = "OLS coefficient (with controls)", title = "Voting")+
  ylim(-1,1)+
  theme(text = element_text(size=12))+
  theme(axis.text.x = element_text(size  = 12, angle = 45, hjust = 1, vjust = 1))+
  scale_x_continuous(breaks = 1:7, labels = interval)+
  theme(legend.position="bottom")
coef_vote_by_age_covs

#right side of fig3
action.out <- df %>% 
  filter(interval != "" & interval != "90-99") %>%
  nest(-interval) %>% #key step here is basically "nesting" in these groups
  mutate(fit = map(data, ~ lm(climate_action ~ age.score + pid7 + ideo52 + white + black + male + bach_degree + church + unemp, data=.)), #then, within groups, run regressions
         results   = map(fit, tidy)) %>% #glancing at the fit 
  unnest(results) %>% #then unnest
  filter(term=="age.score") %>%
  arrange(., interval) %>%
  mutate(lower.conf = estimate-1.96*std.error,
         upper.conf = estimate+1.96*std.error)
action.out$x = c(1:7)
interval <- c("20-29", "30-39","40-49","50-59","60-69","70-79","80-89")

coef_action_by_age_covs <- ggplot(data=action.out,aes(x=x))+
  geom_point(aes(y=estimate), size=3) +
  geom_linerange(aes(ymin=lower.conf, ymax=upper.conf), size=1)+
  theme_bw()+
  geom_hline(yintercept = 0, linetype = "dashed")+
  scale_color_manual(values=c('blue','grey','red'))+
  labs(x = "", y = "", title = "Climate protest")+
  ylim(-1,1)+
  theme(text = element_text(size=12))+
  theme(axis.text.x = element_text(size  = 12, angle = 45, hjust = 1, vjust = 1))+
  scale_x_continuous(breaks = 1:7, labels = interval)+
  theme(legend.position="bottom")
coef_action_by_age_covs

fig3 <- grid.arrange(coef_vote_by_age_covs, coef_action_by_age_covs, ncol=2)
ggsave("figures/fig3.png", fig3, width=8, height=5)


#construct figA4, regressions by 10 year intervals without covariate adjustment 
#left side of fig A4
vote.out <- df %>%
  filter(interval != "" & interval != "90-99") %>%
  nest(-interval) %>%
  mutate(fit = map(data, ~ lm(vote2018 ~ age.score, data=.)),
         results   = map(fit, tidy)) %>%
  unnest(results) %>%
  filter(term=="age.score") %>%
  arrange(., interval) %>%
  mutate(lower.conf = estimate-1.96*std.error,
         upper.conf = estimate+1.96*std.error)
vote.out$x = c(1:7)
interval <- c("20-29", "30-39","40-49","50-59","60-69","70-79","80-89")

coef_vote_by_age <- ggplot(data=vote.out,aes(x=x))+
  geom_point(aes(y=estimate), size=3) +
  geom_linerange(aes(ymin=lower.conf, ymax=upper.conf), size=1)+
  theme_bw()+
  geom_hline(yintercept = 0, linetype = "dashed")+
  scale_color_manual(values=c('blue','grey','red'))+
  labs(x = "", y = "Coefficient from bivariate OLS", title = "Voting")+
  ylim(-1,1)+
  theme(text = element_text(size=12))+
  theme(axis.text.x = element_text(size  = 12, angle = 45, hjust = 1, vjust = 1))+
  scale_x_continuous(breaks = 1:7, labels = interval)+
  theme(legend.position="bottom")
coef_vote_by_age

#right side of fig A4
action.out <- df %>%
  filter(interval != "" & interval != "90-99") %>%
  nest(-interval) %>%
  mutate(fit = map(data, ~ lm(climate_action ~ age.score, data=.)),
         results   = map(fit, tidy)) %>%
  unnest(results) %>%
  filter(term=="age.score") %>%
  arrange(., interval) %>%
  mutate(lower.conf = estimate-1.96*std.error,
         upper.conf = estimate+1.96*std.error)
action.out$x = c(1:7)
interval <- c("20-29", "30-39","40-49","50-59","60-69","70-79","80-89")

coef_action_by_age <- ggplot(data=action.out,aes(x=x))+
  geom_point(aes(y=estimate), size=3) +
  geom_linerange(aes(ymin=lower.conf, ymax=upper.conf), size=1)+
  theme_bw()+
  geom_hline(yintercept = 0, linetype = "dashed")+
  scale_color_manual(values=c('blue','grey','red'))+
  labs(x = "", y = "", title = "Climate protest")+
  ylim(-1,1)+
  theme(text = element_text(size=12))+
  theme(axis.text.x = element_text(size  = 12, angle = 45, hjust = 1, vjust = 1))+
  scale_x_continuous(breaks = 1:7, labels = interval)+
  theme(legend.position="bottom")
coef_action_by_age

#put figure A4 together 
figA4 <- grid.arrange(coef_vote_by_age, coef_action_by_age, ncol=2)
ggsave("figures/figA4.png", figA4, width=8, height=5)




####################################################
# Age ID and participation by 10 year age interval and PID (fig4)
#################################################


action.out <- df %>% 
  filter(pid %in% c("Democrat", "Independent", "Republican") & interval != "" & interval != "90-100") %>%
  nest(-pid, -interval) %>% #key step here is basically "nesting" in these groups
  mutate(fit = map(data, ~ lm(climate_action ~ age.score + pid7 + white + black + male + educ, data=.)), #then, within groups, run regressions
         results   = map(fit, tidy))%>% #glancing at the fit 
  unnest(results) %>% #then unnest
  filter(term=="age.score") %>%
  arrange(., interval, pid) %>%
  mutate(lower.conf = estimate-1.96*std.error,
         upper.conf = estimate+1.96*std.error)
action.out$x = c(.9,1,1.1,1.9,2,2.1,2.9,3,3.1,3.9,4,4.1,4.9,5,5.1, 5.9,6,6.1,6.9,7,7.1, 7.9, 8, 8.1)

#subsetting inds and reps out of older group due to lack of sample.
action.out <- action.out[c(1:12,13,16,19),]

fig4 <- ggplot(data=action.out,aes(x=x))+
  geom_point(aes(y=estimate, color=pid, shape = pid), size=3) +
  geom_linerange(aes(ymin=lower.conf, ymax=upper.conf, color=pid, linetype=pid), size=1)+
  theme_bw()+
  geom_hline(yintercept = 0, linetype = "dashed")+
  scale_color_manual(values=c('blue','grey','red'))+
  labs(x = "", y = "OLS coefficient (with controls)")+
  ylim(-2,2)+
  theme(text = element_text(size=12))+
  theme(axis.text.x = element_text(size  = 12, angle = 45, hjust = 1, vjust = 1))+
  scale_x_continuous(breaks = 1:7, labels = interval)+
  theme(legend.position="right", legend.title = element_blank())
fig4
ggsave("figures/fig4.png", fig4, width=8, height=5)


####################################################
# Age linked fate and participation by 10 year age interval (Fig A4)
#################################################

vote.out <- df %>% 
  filter(interval != "" & interval != "90-99") %>%
  nest(-interval) %>% #key step here is basically "nesting" in these groups
  mutate(fit = map(data, ~ lm(vote2018 ~ link_str2, data=.)), #then, within groups, run regressions
         results   = map(fit, tidy)) %>% #glancing at the fit 
  unnest(results) %>% #then unnest
  filter(term=="link_str2") %>%
  arrange(., interval) %>%
  mutate(lower.conf = estimate-1.96*std.error,
         upper.conf = estimate+1.96*std.error)
vote.out$x = c(1:7)
interval <- c("20-29", "30-39","40-49","50-59","60-69","70-79","80-89")

coef_vote_by_age <- ggplot(data=vote.out,aes(x=x))+
  geom_point(aes(y=estimate), size=3) +
  geom_linerange(aes(ymin=lower.conf, ymax=upper.conf), size=1)+
  theme_bw()+
  geom_hline(yintercept = 0, linetype = "dashed")+
  scale_color_manual(values=c('blue','grey','red'))+
  labs(x = "", y = "Coefficient from bivariate OLS", title = "Voting")+
  ylim(-.3,.3)+
  theme(text = element_text(size=12))+
  theme(axis.text.x = element_text(size  = 12, angle = 45, hjust = 1, vjust = 1))+
  scale_x_continuous(breaks = 1:7, labels = interval)+
  theme(legend.position="bottom")

action.out <- df %>% 
  filter(interval != "" & interval != "90-99") %>%
  nest(-interval) %>% 
  mutate(fit = map(data, ~ lm(climate_action ~ link_str2, data=.)), 
         results   = map(fit, tidy)) %>% 
  unnest(results) %>% 
  filter(term=="link_str2") %>%
  arrange(., interval) %>%
  mutate(lower.conf = estimate-1.96*std.error,
         upper.conf = estimate+1.96*std.error)
action.out$x = c(1:7)
interval <- c("20-29", "30-39","40-49","50-59","60-69","70-79","80-89")

coef_action_by_age <- ggplot(data=action.out,aes(x=x))+
  geom_point(aes(y=estimate), size=3) +
  geom_linerange(aes(ymin=lower.conf, ymax=upper.conf), size=1)+
  theme_bw()+
  geom_hline(yintercept = 0, linetype = "dashed")+
  scale_color_manual(values=c('blue','grey','red'))+
  labs(x = "", y = "", title = "Climate protest")+
  ylim(-.3,.3)+
  theme(text = element_text(size=12))+
  theme(axis.text.x = element_text(size  = 12, angle = 45, hjust = 1, vjust = 1))+
  scale_x_continuous(breaks = 1:7, labels = interval)+
  theme(legend.position="bottom")
coef_action_by_age

figA4 <- grid.arrange(coef_vote_by_age, coef_action_by_age, ncol=2)
ggsave("figures/figA4.png", figA4, width=8, height=5)

############################################
#Robustness check using only 'insult' question to measure age identity strength
############################################

df$insult.mc <- (df$insult - mean(df$insult,na.rm=T))/5
df$interact <- df$insult.mc*df$age.mc

#regressions
idreg1 <- lm(vote2018 ~ insult.mc, data = df)
idreg2 <- lm(vote2018 ~ insult.mc + age.mc + interact, data = df)
idreg3 <- lm(vote2018 ~ insult.mc + age.mc + interact + pid7 + white + black + male + bach_degree + church + unemp, data = df)

idrega1 <- lm(climate_action ~ insult.mc, data = df)
idrega2 <- lm(climate_action ~ insult.mc + age.mc + interact, data = df)
idrega3 <- lm(climate_action ~ insult.mc + age.mc + interact + pid7 + white + black + male + bach_degree + church + unemp, data = df)

#table A3
cov.lab = c("Age identity","Age","Age identity * Age", "PID", "White", "Black", "Male", "Bachelors degree", "Church-going", "Out of work")
stargazer(idreg1, idreg2, idreg3, idrega1, idrega2, idrega3,
          se = starprep(idreg1, idreg2, idreg3, idrega1, idrega2, idrega3),
          no.space = T, title = "",
          digits = 2,
          covariate.labels=cov.lab,
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          keep.stat=c("n","rsq"),
          label = "",
          omit = c("time","Constant")
)


#############################################
#Correlation between age identity measure and other pro-social measures on survey (Table A4)
#############################################

cor(df$age.score, df$compete, use = "complete.obs")
cor(df$age.score, df$individualism, use = "complete.obs")



